clear all
set more off
set matsize 10000
set maxvar 10000
adopath + ../code/gslab_tools/

/***********
*Variation analysis 2x2
*********/

/*******
* Create Active v.s. Active Table
*********/

use "../derived/Combined/combined_arm_level.dta", clear
keep if year!=.
set seed 22
keep if (active_active_never_combo | active_active_always_combo | ///
	active_active_both_combo) & variation_combo & (share_respond!=. | outcome_all!=.)
gen n=1
keep studyname n drug share_respond outcome_all drug_combo_no sponsor sample scale

*Duplicate entries with two-way variation
bys studyname (drug): gen sponsor1_temp = sponsor if _n==1
bys studyname (drug): egen sponsor1 = max(sponsor1_temp)
bys studyname (drug): gen sponsor2_temp = sponsor if _n==2
bys studyname (drug): egen sponsor2 = max(sponsor2_temp)
drop *temp

expand 2 if drug_combo_no == 38 & (sponsor1==0 & sponsor2==1), gen(new1)
replace drug_combo_no = drug_combo_no + 9000 if new1 | ///
	drug_combo_no == 38 & (sponsor1==0 & sponsor2==0)
	
expand 2 if drug_combo_no == 130 & sponsor1==0 & sponsor2==0, gen(new2)
replace drug_combo_no = drug_combo_no + 9000 if new2 | ///
	drug_combo_no == 130 & sponsor1==1 & sponsor2==0

expand 2 if drug_combo_no == 195 & sponsor1==0 & sponsor2==1, gen(new3)
replace drug_combo_no = drug_combo_no + 9000 if new3 | ///
	drug_combo_no == 195 & sponsor1==0 & sponsor2==0
	
expand 2 if drug_combo_no == 228 & sponsor1==1 & sponsor2==1, gen(new4)
replace drug_combo_no = drug_combo_no + 9000 if new4 | ///
	drug_combo_no == 228 & sponsor1==0 & sponsor2==1

keep studyname n drug share_respond outcome_all drug_combo_no sponsor sample
bys drug_combo_no drug: egen mean_sponsor = mean(sponsor)
gen constant = mean_sponsor==1 | mean_sponsor==0


bys studyname drug: gen count = _n==1
bys studyname: gen count_drug = sum(count)
// These are on the diagnonal, so both drugs vary and we need to assign one to
// be listed first in the table. 
replace constant = 1 if count_drug==1 & inlist(drug_combo_no,48,102,134,139,200) 
save ../derived/Diff_in_diff_setup/temp.dta, replace

keep drug drug_combo_no constant
duplicates drop
reshape wide drug, i(drug_combo_no) j(constant)
gen drug_name = drug0 + " vs. " + drug1
drop drug0 drug1
save ../derived/Diff_in_diff_setup/combo_names.dta, replace

use ../derived/Diff_in_diff_setup/temp.dta, clear
merge m:1 drug_combo_no using ../derived/Diff_in_diff_setup/combo_names.dta, nogen
replace share_respond = outcome_all if share_respond==.
gen sponsor_all_temp = constant ==0 & sponsor ==1
bys studyname drug_combo_no: egen sponsor_all = max(sponsor_all_temp)

collapse (sum) n (mean) share_respond, by(drug_combo_no drug_name sponsor_all constant sample)
bys drug_combo_no (sponsor_all constant): gen j=_n
drop sponsor_all constant
reshape wide share_respond n, i(drug_combo_no drug_name sample) j(j)
gen n = n1+n3
gsort -n drug_name

gen diff1 = share_respond3 - share_respond4
gen diff0 = share_respond1 - share_respond2
gen dd = diff1 - diff0
order drug_name share_respond3 share_respond4 diff1 n3 share_respond1 share_respond2 diff0 n1 dd n drug_combo_no
keep drug_name share_respond3 share_respond4 diff1 n3 share_respond1 share_respond2 diff0 n1 dd sample n drug_combo_no
replace drug_name=proper(drug_name)
replace drug_name = subinstr(drug_name, "Vs.", "vs.", .)
rename drug_name drug
save ../derived/Diff_in_diff_setup/two_by_two_active.dta, replace
	
*Last row of table	
use ../derived/Diff_in_diff_setup/two_by_two_active.dta, clear
collapse (rawsum) n3 n1 (mean) share_respond* dd* diff* [fw=n], by(sample)
append using ../derived/Diff_in_diff_setup/two_by_two_active.dta
keep if sample=="anti"
save ../derived/Diff_in_diff_setup/two_by_two_active_anti_w_mean.dta, replace
	
use ../derived/Diff_in_diff_setup/two_by_two_active.dta, clear
collapse (rawsum) n3 n1 (mean) share_respond* dd* diff* [fw=n], by(sample)
append using ../derived/Diff_in_diff_setup/two_by_two_active.dta
keep if sample=="schiz"
save ../derived/Diff_in_diff_setup/two_by_two_active_schiz_w_mean.dta, replace


/****
* Create Active vs Placebo Table
********/
use "../derived/Combined/combined_arm_level.dta", clear
keep if year!=.
keep if active_placebo_combo==1 & variation_combo==1 & share_respond!=.
gen n=1

keep studyname n drug share_respond drug_combo_no sponsor
bys studyname: egen sponsor_paper = max(sponsor)
collapse (sum) n (mean) share_respond, by(drug_combo_no sponsor_paper drug)
gen placebo = drug=="placebo"
replace drug = drug[_n-1] if drug_combo_no[_n]==drug_combo_no[_n-1] & drug=="placebo"
replace drug = drug[_n+1] if drug_combo_no[_n]==drug_combo_no[_n+1] & drug=="placebo"

bys drug_combo_no (sponsor placebo): gen j=_n
drop sponsor placebo
reshape wide share n, i(drug_combo_no drug) j(j)
gen n = n1+n3
gsort -n drug

gen diff1 = share_respond3 - share_respond4
gen diff0 = share_respond1 - share_respond2
gen dd = diff1 - diff0
order drug share_respond3 share_respond4 diff1 n3 share_respond1 share_respond2 diff0 n1 dd n
keep drug share_respond3 share_respond4 diff1 n3 share_respond1 share_respond2 diff0 n1 dd n 
replace drug=proper(drug)
replace drug = drug + " vs. Placebo"
save ../derived/Diff_in_diff_setup/two_by_two.dta, replace
	
*Last row of table	
collapse (rawsum) n3 n1 (mean) share_respond* dd* diff* [fw=n]
append using ../derived/Diff_in_diff_setup/two_by_two.dta
order share_respond3 share_respond4 diff1 n3 share_respond1 share_respond2 diff0 n1 dd
save ../derived/Diff_in_diff_setup/two_by_two_w_mean.dta, replace


* Clean up files
cap erase ../derived/Diff_in_diff_setup/two_by_two_active.dta
cap erase ../derived/Diff_in_diff_setup/temp.dta
cap erase ../derived/Diff_in_diff_setup/two_by_two.dta
cap erase ../derived/Diff_in_diff_setup/combo_names.dta